Download and process the additional mitochondria biomarker data

All external data files used in this code are available at https://github.com/MoTrPAC/motrpac-rat-training-mitochondria/tree/main/data

This script assumes that these files were downloaded to the data_dir path specified above

RNA-seq and cDNA data

# read the mtDNA data file
mt_data = read.csv(
  file=paste0(data_dir,"mtDNA_motrpac_pass1b.csv"),
  stringsAsFactors = F
)
mt_data$bid = mt_data$Label

# read the RNA-seq metrics to obtain mtRNA read percentages
rnaseq_qa_qc = TRNSCRPT_META
pct_mito_reads = rnaseq_qa_qc$pct_chrM
names(pct_mito_reads) = as.character(rnaseq_qa_qc$vial_label)
rownames(rnaseq_qa_qc) = as.character(rnaseq_qa_qc$vial_label)
colnames(rnaseq_qa_qc) = tolower(colnames(rnaseq_qa_qc))

# load the pheno data
pheno = PHENO

Get metab data

cardiolipins_dir = paste0(data_dir,"/cardiolipins/")
cl_files = list.files(cardiolipins_dir,full.names = T)
cl_files = cl_files[!grepl("name_mapping",cl_files)]
cl_data = c()
cl_fdata = c()
for(f in cl_files){
  f_arr = strsplit(f,split="_")[[1]]
  tissue = f_arr[length(f_arr)]
  tissue = gsub(".csv","",tissue)
  tissue = toupper(tissue)
  if(tissue=="MUSCLE"){
    tissue = "SKMGN"
  }
  if(tissue=="hippocampus"){
    tissue = "HIPPOC"
  }
  d = fread(f,stringsAsFactors = F,data.table = F,header=T)
  d = d[!is.na(d[,1]),]
  if(ncol(d)<51){next} # ignore inomplete tissues
  # use bid instead of vial labels
  colnames(d)[-1] = substring(colnames(d)[-1],1,5)
  feature_d = cbind(tissue,d[,1])
  if(length(cl_data)==0){
    cl_data = d
    cl_fdata = feature_d
  }else{
    cl_data = rbind(cl_data,d[,colnames(cl_data)])
    cl_fdata = rbind(cl_fdata,feature_d)
  }
}
cl_data = cl_data[,-1]
colnames(cl_fdata) = c("tissue","feature_ID")
cl_fdata = data.frame(cl_fdata,stringsAsFactors = F)

# transform WATSC and SKMGN to WAT-SC and SKM-GN
# to be consistent with the pheno data and the other
# data objects:
cl_fdata$tissue[cl_fdata$tissue=="SKMGN"] = "SKM-GN"
cl_fdata$tissue[cl_fdata$tissue=="WATSC"] = "WAT-SC"
cl_fdata$tissue[cl_fdata$tissue=="WAT"] = "WAT-SC"
table(cl_fdata$tissue)
## 
##  HEART KIDNEY  LIVER   LUNG SKM-GN WAT-SC 
##     11     16      9      3     11      4
sort(table(cl_fdata$feature_ID),decreasing=T)[1:4]
## 
## CL(72:8)>CL(18:2/18:2/18:2/18:2)                        CL(74:11) 
##                                4                                4 
##                         CL(74:9)                         CL(70:7) 
##                                4                                3
cl_fdata[cl_fdata$feature_ID=="CL(72:8)",]
##    tissue feature_ID
## 43 SKM-GN   CL(72:8)
## 53 WAT-SC   CL(72:8)
selected_cls = unique(cl_fdata$feature_ID)

# # Optional: get cardiolipins from the main landscape paper:
# sample_data = METAB_NORM_DATA_FLAT
# sample_data_featureinfo = sample_data[,1:5]
# # Limit the data to cardiolipins
# cl_inds = grepl("^CL",sample_data$feature_ID,perl = T)
# cl_data = sample_data[cl_inds,]
# cl_fdata = sample_data_featureinfo[cl_inds,]
# # change column names to bids
# m = unique(pheno[,c("pid","bid")])
# p2b = as.character(m[,2])
# names(p2b) = as.character(m[,1])
# colnames(cl_data)[-c(1:5)] = unname(p2b[colnames(cl_data)[-c(1:5)]])

Get mito gene annotation

# Add mito gene annotations
mt = fread(paste0(data_dir,
        "external-datasets_gene-sets_Mitocarta_Rat.MitoCarta3.0.genes_only.txt"),
        stringsAsFactors = F,data.table = F)

# additional pathway enrichment with mitocarta pathways
mt_pw = fread(paste0(data_dir,"Human.MitoCarta3.0.txt"),header = T,
        stringsAsFactors = F,data.table = F)

# map transcripts to genes using biomart
library(biomaRt)
mart_rat = useMart("ensembl", dataset = "rnorvegicus_gene_ensembl")
attrs  = c("ensembl_gene_id","ensembl_peptide_id","rgd_symbol")
rat_prot_gene = getBM(attributes = attrs, mart = mart_rat,  uniqueRows=T)
rat_prot_gene = rat_prot_gene[rat_prot_gene$ensembl_peptide_id!="",]
rat_prot_gene = rat_prot_gene[rat_prot_gene$rgd_symbol != "",]

# read in rat to human mapping
rat_to_human = fread(paste0(data_dir,"RGD_ORTHOLOGS_20201001.txt"),
        stringsAsFactors = F,data.table = F)
rat_to_human = rat_to_human[!is.na(rat_to_human$HUMAN_ORTHOLOG_SYMBOL),]
rat_to_human_map = unique(rat_to_human[,c("RAT_GENE_SYMBOL", "HUMAN_ORTHOLOG_SYMBOL")])

# make a pathway:member list
mitocarta_pathways = list()
for(pw in mt_pw$MitoPathway){
  members = unname(unlist(strsplit(mt_pw[mt_pw$MitoPathway==pw, "Genes"], ', ')))
  rat_members = rat_to_human_map[rat_to_human_map$HUMAN_ORTHOLOG_SYMBOL %in% members, 1]
  rat_members = unique(na.omit(rat_members))
  mitocarta_pathways[[pw]] = rat_members
  #print(paste(pw, length(members), length(rat_members)))
}

save(rat_prot_gene,
     mt,mt_pw,mitocarta_pathways,rat_to_human,
     file=paste0(data_dir,"mito_gene_annotation.RData"))

Load above data:

load(paste0(data_dir,"mito_gene_annotation.RData"))

Tissue comparison at baseline

RNA-Seq

control_vials = rownames(pheno)[pheno$intervention=="control"]
control_vials = intersect(control_vials,names(pct_mito_reads))
baseline_pct_mito = data.frame(
  pct_mito_reads = pct_mito_reads[control_vials],
  tissue = pheno[control_vials,"tissue"],
  sex = pheno[control_vials,"sex"]
)

baseline_pct_mito_male = baseline_pct_mito[baseline_pct_mito$sex == "male",]
baseline_pct_mito_female = baseline_pct_mito[baseline_pct_mito$sex == "female",]

currdf = baseline_pct_mito_male
tissue_med = tapply(currdf$pct_mito_reads,currdf$tissue,median)
tissue_med = sort(tissue_med,decreasing = T)
currdf$color = TISSUE_COLORS[currdf$tissue]
currdf$tissue = factor(currdf$tissue,levels = names(tissue_med))
currdf_male = currdf
currdf_male$sex = "Male"

currdf = baseline_pct_mito_female
tissue_med = tapply(currdf$pct_mito_reads,currdf$tissue,median)
tissue_med = sort(tissue_med,decreasing = T)
currdf$color = TISSUE_COLORS[currdf$tissue]
currdf$tissue = factor(currdf$tissue,levels = names(tissue_med))
currdf_female = currdf
currdf_female$sex = "Female"

baseline_pct_mito = rbind(currdf_male,currdf_female)

ggplot(baseline_pct_mito, aes(x=tissue, y=pct_mito_reads,fill=as.character(tissue))) + 
  geom_boxplot() + facet_wrap(~sex) + xlab("") + 
  theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) + 
  scale_fill_manual(
    labels = sort(as.character(unique(baseline_pct_mito$tissue))),
    values = TISSUE_COLORS[sort(as.character(unique(baseline_pct_mito$tissue)))])

mt_dna_copy = mt_data[mt_data$Group=="Ctrl",]
mt_dna_copy$sex = ifelse(mt_dna_copy$sex == "M","Male","Femlae")
tissue_med = tapply(mt_dna_copy$Level,mt_dna_copy$Tissue,median)
tissue_med = sort(tissue_med,decreasing = T)
mt_dna_copy$Tissue = factor(mt_dna_copy$Tissue,levels = names(tissue_med))
ggplot(mt_dna_copy, aes(x=Tissue, y=Level,fill=as.character(Tissue))) + 
  geom_boxplot() + facet_wrap(~sex) + xlab("") + 
  theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) + 
  scale_fill_manual(
    labels = sort(as.character(unique(mt_dna_copy$Tissue))),
    values = TISSUE_COLORS[sort(as.character(unique(mt_dna_copy$Tissue)))])

Comparison of mt biomarkers

Get the biomarker df per tissue

# Get data frames with all mito biomarkers, by tissue
mt_biomarkers = list()
for(tissue_name in unique(pheno$tissue)){
  if(is.na(tissue_name)){next}
  if(grepl("VENA",tissue_name)){next}
  print(paste("Analyzing data from:",tissue_name))
  
  # get all vial labels
  tissue_vials = rownames(pheno)[pheno$tissue == tissue_name]
  # limit to vial labels that appear in the rna-seq data
  tissue_vials = intersect(names(pct_mito_reads),tissue_vials)
  tissue_vials = setdiff(tissue_vials,OUTLIERS$viallabel)
  if(length(tissue_vials)<5){next}
  # get the bids
  tissue_bids = sapply(tissue_vials,substring,1,5)
  # save results in a data frame
  tissue_df = data.frame(
    mt_reads = pct_mito_reads[tissue_vials],
    viallabel = tissue_vials,
    bid = tissue_bids
  )
  
  tissue_df$mtdna = NA
  if(sum(mt_data$Tissue==tissue_name)>0){
    currmt_data = mt_data[mt_data$Tissue==tissue_name,]
    rownames(currmt_data) = as.character(currmt_data$bid)
    curr_shared_bids = intersect(tissue_df$bid,rownames(currmt_data))
    for(bid in curr_shared_bids){
      tissue_df[tissue_df$bid==bid,"mtdna"] = currmt_data[bid,"Level"]
    }
    tissue_df$mtdna = log2(tissue_df$mtdna)
  }
  
  tissue_df$clPC1 = NA
  tissue_df$clPC2 = NA
  tissue_df$clPC3 = NA
  for(cl in selected_cls){tissue_df[[cl]] = NA}
  curr_cl_data = NULL
  if(sum(cl_fdata$tissue == tissue_name)>0){
    curr_shared_bids = intersect(tissue_df$bid,colnames(cl_data))
    curr_cl_data = cl_data[cl_fdata$tissue == tissue_name,curr_shared_bids]
    metab_names = cl_fdata$feature_ID[cl_fdata$tissue == tissue_name]
    curr_cl_data = aggregate(curr_cl_data,by=list(metab_names),mean,na.rm=T)
    rownames(curr_cl_data) = curr_cl_data[,1]
    curr_cl_data = curr_cl_data[,-1]
    # remove columns that are all NAs
    bid_na_rate = colSums(is.na(curr_cl_data)) / nrow(curr_cl_data)
    curr_cl_data = curr_cl_data[,bid_na_rate<1]
    curr_shared_bids = colnames(curr_cl_data)
    # check missing values
    na_rate = sum(is.na(curr_cl_data))/(nrow(curr_cl_data)*ncol(curr_cl_data))
    if(na_rate >= 0.05){
      curr_cl_data = NULL
    }
    else{
      # use min imputation
      curr_cl_data = t(curr_cl_data)
      curr_cl_data[is.nan(curr_cl_data)] = NA
      curr_cl_data[is.na(curr_cl_data)] = min(curr_cl_data,na.rm=T)
    }
    
    if(!is.null(curr_cl_data) && nrow(curr_cl_data)>0){
      # log2 transform
      curr_cl_data = log2(curr_cl_data)
      cl_pca = prcomp(curr_cl_data)
      num_cols = min(3,ncol(curr_cl_data))
      curr_cl_data_raw = curr_cl_data
      curr_cl_data = as.matrix(cl_pca$x[,1:num_cols],ncol=num_cols)
      colnames(curr_cl_data) = paste0("clPC",1:num_cols)
      for(pc in colnames(curr_cl_data)){
        for(bid in curr_shared_bids){
          tissue_df[tissue_df$bid==bid,pc] = curr_cl_data[bid,pc]
        }
      }
      # Add selected CLs
      for(cl in selected_cls){
        if(!cl %in% colnames(curr_cl_data_raw)){next}
        for(bid in curr_shared_bids){
          tissue_df[tissue_df$bid==bid,cl] = curr_cl_data_raw[bid,cl]
        }
      }
    }
  }
  
  # add sex and group, save the df
  tissue_df$group = pheno[tissue_df$viallabel,"group"]
  tissue_df$sex = pheno[tissue_df$viallabel,"sex"]
  mt_biomarkers[[gsub("-","",tissue_name)]] = tissue_df
}
## [1] "Analyzing data from: HYPOTH"
## [1] "Analyzing data from: CORTEX"
## [1] "Analyzing data from: HIPPOC"
## [1] "Analyzing data from: ADRNL"
## [1] "Analyzing data from: LUNG"
## [1] "Analyzing data from: HEART"
## [1] "Analyzing data from: LIVER"
## [1] "Analyzing data from: SKM-GN"
## [1] "Analyzing data from: WAT-SC"
## [1] "Analyzing data from: PLASMA"
## [1] "Analyzing data from: SMLINT"
## [1] "Analyzing data from: OVARY"
## [1] "Analyzing data from: COLON"
## [1] "Analyzing data from: SPLEEN"
## [1] "Analyzing data from: SKM-VL"
## [1] "Analyzing data from: BLOOD"
## [1] "Analyzing data from: BAT"
## [1] "Analyzing data from: KIDNEY"
## [1] "Analyzing data from: TESTES"

Compute statistics

#' Helper function: compute corr matrix from data with NAs
get_corrmat<-function(x){
  n = ncol(x)
  m = matrix(NA,ncol=n,nrow=n,dimnames=list(colnames(x),colnames(x)))
  for(i in 1:n){
    inds1 = !is.na(x[,i])
    if(sum(inds1)<3){next}
    for(j in 1:i){
      inds2 = !is.na(x[,j])
      if(sum(inds2)<3){next}
      inds = inds1 & inds2
      m[i,j] = cor(as.numeric(x[inds,i]),as.numeric(x[inds,j]))
      m[j,i] = m[i,j]
    }
  }
  return(m)
}
#' Helper function for the statistics
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following object is masked from 'package:AnnotationDbi':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
compute_biomarker_dea_res<-function(x,group){
  kw_p = kruskal.test(x,group)$p.value
  df = data.frame(x=x,group=group)
  m = lm(x~0+group,data=df)
  coeffs = coefficients(m)
  curr_groups = gsub("group","",names(coeffs))
  curr_groups = sort(curr_groups[!grepl("control",curr_groups)])
  cons = matrix(0,nrow=length(curr_groups),ncol=length(coeffs),
                dimnames = list(curr_groups,names(coeffs)))
  cons[,which(grepl("control",colnames(cons)))] = -1
  for(r in rownames(cons)){cons[r,grepl(r,colnames(cons))]=1}
  cons_res = glht(m, linfct = cons)
  cons_res = summary(cons_res)
  cons_coeffs = cons_res$test$coefficients
  cons_tstats = cons_res$test$tstat
  cons_names = names(cons_coeffs)
  cons_ps = as.numeric(cons_res$test$pvalues)
  names(cons_coeffs) = paste0("fc_",cons_names)
  names(cons_ps) = paste0("pval_",cons_names)
  names(cons_tstats) = paste0("tstat_",cons_names)
  v = rep(NA,13)
  names(v) = c("kw_p",paste0("fc_",c("1w","2w","4w","8w")),
               paste0("pval_",c("1w","2w","4w","8w")),
               paste0("tstat_",c("1w","2w","4w","8w")))
  v["kw_p"] = kw_p
  v[names(cons_coeffs)] = cons_coeffs
  v[names(cons_ps)] = cons_ps
  v[names(cons_tstats)] = cons_tstats
  return(data.frame(t(v)))
}

library(multcomp)
compute_lm_dea<-function(x,g){
  glevels = sort(unique(g))
  missing_groups = setdiff(c("1w","2w","4w","8w"),glevels)
  df = data.frame(x=x,g=factor(g,levels = glevels))
  m0 = lm(x~1,data=df)
  m1 = lm(x~1+g,data=df)
  training_p = anova(m0,m1)$`Pr(>F)`[2]
  m = lm(x~0+g,data=df)
  K = diag(length(glevels)-1)
  K = cbind(K,-1)
  rownames(K) = glevels[1:nrow(K)]
  pairwise_res = summary(glht(m, linfct = K))$test
  names(pairwise_res$coefficients) = paste0("beta_",names(pairwise_res$coefficients))
  names(pairwise_res$pvalues) = paste0("pvalue_",names(pairwise_res$tstat))
  names(pairwise_res$tstat) = paste0("tscore_",names(pairwise_res$tstat))
  if(length(missing_groups)>0){
    pairwise_res$coefficients[paste0("beta_",missing_groups)]=NA
    pairwise_res$pvalues[paste0("pvalue_",missing_groups)]=NA
    pairwise_res$tstat[paste0("tscore_",missing_groups)]=NA
  }
  v = c(
    training_p = training_p,
    pairwise_res$coefficients,
    pairwise_res$tstat,
    pairwise_res$pvalues
  )
  return(data.frame(t(v),check.names = F))
}

### Stat computations
mt_biomarker_correlations = list()
mt_biomarker_dea = c()
mt_biomarker_dea_lm = c()
for(tissue_name in names(mt_biomarkers)){
  tissue_df = mt_biomarkers[[tissue_name]]
  for(s in c("female","male")){
    x = tissue_df[tissue_df$sex==s,]
    biomarkers = setdiff(names(tissue_df),c("viallabel","bid","group","sex"))
    biomarkers = biomarkers[!apply(is.na(tissue_df[,biomarkers]),2,all)]
    biomarkers = biomarkers[!grepl("clPC",biomarkers)]
    for(biomarker in biomarkers){
      currx = x[,biomarker]
      currg = x[,"group"]
      if(all(is.na(currx))){next}
      inds=!is.na(currx)
      currx = currx[inds];currg=currg[inds]
      dea_res = compute_biomarker_dea_res(currx,currg)
      dea_res$tissue = tissue_name
      dea_res$sex = s
      dea_res$biomarker = biomarker
      mt_biomarker_dea = rbind(mt_biomarker_dea,dea_res)
      dea_res = compute_lm_dea(currx,currg)
      dea_res$tissue = tissue_name
      dea_res$sex = s
      dea_res$biomarker = biomarker
      if(length(mt_biomarker_dea_lm)==0){
        mt_biomarker_dea_lm = rbind(mt_biomarker_dea_lm,dea_res)
      }else{
        mt_biomarker_dea_lm = rbind(mt_biomarker_dea_lm,
                      dea_res[,colnames(mt_biomarker_dea_lm)])
      }
      
    }
  }
  
  biomarkers = setdiff(names(tissue_df),c("viallabel","bid","group","sex"))
  biomarkers = biomarkers[!apply(is.na(tissue_df[,biomarkers]),2,all)]
  biomarkers = biomarkers[!grepl("clPC",biomarkers)]
  if(length(biomarkers)>1){
    biomarkers = sort(biomarkers)
    curr_corrs = get_corrmat(tissue_df[,biomarkers])
    mt_biomarker_correlations[[tissue_name]] = curr_corrs
  }
}

write.csv(
  mt_biomarker_dea,
  file = paste0(output_dir,"mt_biomarker_dea_ttest_pw.csv"),
  row.names = F,quote = F)
write.csv(
  mt_biomarker_dea_lm,
  file = paste0(output_dir,"mt_biomarker_dea_lm.csv"),
  row.names = F,quote = F)

save(mt_biomarkers,
     mt_biomarker_correlations,
     mt_biomarker_dea,
     mt_biomarker_dea_lm,
     file=paste0(data_dir,"mt_biomarkers.RData"))

Load the data instead of recomputing:

load(paste0(data_dir,"mt_biomarkers.RData"))
biomarkers = setdiff(names(mt_biomarkers[[1]]),c("viallabel","bid","group","sex"))

Examine the Adrenal data

Fold changes and p-values:

mt_biomarker_dea[mt_biomarker_dea$tissue=="ADRNL" & mt_biomarker_dea$biomarker=="mt_reads",]
##          kw_p   fc_1w   fc_2w   fc_4w   fc_8w     pval_1w   pval_2w   pval_4w
## 13 0.24568191 -4.7687 -4.4532 -4.9286 -6.4990 0.184920310 0.1908726 0.1314439
## 15 0.01012549  9.6878  4.2518  2.4568  2.7152 0.002380203 0.2623638 0.7021445
##       pval_8w tstat_1w  tstat_2w  tstat_4w  tstat_8w tissue    sex biomarker
## 13 0.03312686 -1.98126 -1.962411 -2.171908 -2.863943  ADRNL female  mt_reads
## 15 0.62972390  4.01898  1.763857  1.019202  1.126400  ADRNL   male  mt_reads
mt_biomarker_dea_lm[mt_biomarker_dea_lm$tissue=="ADRNL" & mt_biomarker_dea_lm$biomarker=="mt_reads",]
##     training_p beta_1w beta_2w beta_4w beta_8w tscore_1w tscore_2w tscore_4w
## 13 0.095783716 -4.7687 -4.4532 -4.9286 -6.4990  -1.98126 -1.962411 -2.171908
## 15 0.009361876  9.6878  4.2518  2.4568  2.7152   4.01898  1.763857  1.019202
##    tscore_8w   pvalue_1w pvalue_2w pvalue_4w  pvalue_8w tissue    sex biomarker
## 13 -2.863943 0.184601080 0.1910585 0.1313307 0.03305748  ADRNL female  mt_reads
## 15  1.126400 0.002435973 0.2621904 0.7021481 0.62978917  ADRNL   male  mt_reads

Simple boxplots

df = mt_biomarkers$ADRNL
df$group = factor(df$group,levels = c("control","1w","2w","4w","8w"))
par(mar=c(5,4,1,1),mfrow=c(1,2))
boxplot(mt_reads~group,data=df[df$sex=="male",],las=2,xlab="",
        main="Mito reads, males",ylab="Percent reads")
boxplot(mt_reads~group,data=df[df$sex=="female",],las=2,xlab="",
        main="Mito reads, females",ylab="Percent reads")

Examine all tissues

symargs = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
               symbols = c("****", "***", "**", "*", ""))
for(tissue in names(mt_biomarkers)){
  df = mt_biomarkers[[tissue]]
  df$group = factor(df$group,levels = c("control","1w","2w","4w","8w"))
  if(tissue %in% c("OVARY","TESTES","VENACAVA")){next}
  
  num_plots = 1 # for reads
  if(!all(is.na(df$mtdna))){num_plots=num_plots+1}
  
  df$CL_72_8 = df$`CL(72:8)>CL(18:2/18:2/18:2/18:2)`
  if(all(is.na(df$CL_72_8))){df$CL_72_8=df$`CL(72:8)`}
  if(!all(is.na(df$CL_72_8))){num_plots=num_plots+1}
  
  curr_col = TISSUE_COLORS[tissue]
  if(tissue == "SKMGN"){curr_col = TISSUE_COLORS["SKM-GN"]}
  if(tissue == "SKMVL"){curr_col = TISSUE_COLORS["SKM-VL"]}
  if(tissue == "WATSC"){curr_col = TISSUE_COLORS["WAT-SC"]}
  
  # par(mar=c(5,4,1,1),mfrow=c(num_plots,2))
  # boxplot(mt_reads~group,data=df[df$sex=="male",],las=2,xlab="",
  #       main=paste(tissue,"mito reads, males",sep=","),ylab="Percent reads",col=curr_col)
  # boxplot(mt_reads~group,data=df[df$sex=="female",],las=2,xlab="",
  #       main=paste(tissue,"mito reads, females",sep=","),ylab="Percent reads",col=curr_col)
  
  currplots = list()
  curr_range = c(min(df$mt_reads,na.rm=T),max(df$mt_reads,na.rm=T))
  new_max = curr_range[2] + 0.15 *(curr_range[2]-curr_range[1])
  new_max2 = curr_range[2] + 0.3 *(curr_range[2]-curr_range[1])
  p = ggboxplot(df,x="group",y="mt_reads",fill = curr_col,facet.by = "sex",notch = F) +
  stat_compare_means(method = "anova", label.y = new_max,size=5, label.x = 2)+      # Add global p-value
  stat_compare_means(label = "p.signif", method = "t.test",
    ref.group = "control",symnum.args=symargs,size=6) + ylab("Percent reads") + xlab("") +
    ggtitle(paste(tissue,"mito reads")) + 
    theme(strip.text.x = element_text(size = 15),plot.title = element_text(size=18,hjust=0.5)) 
  p = ggpar(p,ylim = c(curr_range[1],new_max2))
  currplots[["pct"]] = p
  #print(p)
  
  if(!all(is.na(df$mtdna))){
    # boxplot(mtdna~group,data=df[df$sex=="male",],las=2,xlab="",
    #     main=paste(tissue,"mtDNA, males",sep=","),ylab="mtDNA level",col=curr_col)
    # boxplot(mtdna~group,data=df[df$sex=="female",],las=2,xlab="",
    #     main=paste(tissue,"mtDNA, females",sep=","),ylab="mtDNA level",col=curr_col)
    
    curr_range = c(min(df$mtdna,na.rm=T),max(df$mtdna,na.rm=T))
    new_max = curr_range[2] + 0.15 *(curr_range[2]-curr_range[1])
    new_max2 = curr_range[2] + 0.3 *(curr_range[2]-curr_range[1])
    p = ggboxplot(df,x="group",y="mtdna",fill = curr_col,facet.by = "sex",notch = F) + 
      stat_compare_means(method = "anova", label.y = new_max, size=5,label.x = 2)+      # Add global p-value
      stat_compare_means(label = "p.signif", method = "t.test",
        ref.group = "control",symnum.args=symargs,size=6) + ylab("mtDNA level") + xlab("") +
        ggtitle(paste(tissue,"mtDNA")) + 
        theme(strip.text.x = element_text(size = 15),plot.title = element_text(size=18,hjust=0.5)) 
    p = ggpar(p,ylim = c(curr_range[1],new_max2))
    currplots[["mtdna"]] = p
    #print(p)
  }
  
  if(!all(is.na(df$CL_72_8))){
    # boxplot(CL_72_8~group,data=df[df$sex=="male",],las=2,xlab="",
    #     main=paste(tissue,"CL, males",sep=","),ylab="log2 intensity",col=curr_col)
    # boxplot(CL_72_8~group,data=df[df$sex=="female",],las=2,xlab="",
    #     main=paste(tissue,"CL, females",sep=","),ylab="log2 intensity",col=curr_col)
    
    curr_range = c(min(df$CL_72_8,na.rm=T),max(df$CL_72_8,na.rm=T))
    new_max = curr_range[2] + 0.15 *(curr_range[2]-curr_range[1])
    new_max2 = curr_range[2] + 0.3 *(curr_range[2]-curr_range[1])
    p = ggboxplot(df,x="group",y="CL_72_8",fill = curr_col,facet.by = "sex",notch = F) + 
      stat_compare_means(method = "anova",label.y = new_max,size=5,label.x = 2)+      # Add global p-value
      stat_compare_means(label = "p.signif", method = "t.test",
        ref.group = "control",symnum.args=symargs,size=6) + ylab("log2 intensity") + xlab("") +
        ggtitle(paste(tissue,"CL(72:8)")) + 
        theme(strip.text.x = element_text(size = 15),plot.title = element_text(size=18,hjust=0.5))
    p = ggpar(p,ylim = c(curr_range[1],new_max2))
    currplots[["cl"]] = p
    #print(p)
  }
  do.call("grid.arrange", c(currplots, nrow = 1))
}

Visualizations

get_stat_matrix<-function(mt_biomarker_dea,stat_name,biomarkers,tissues){
  m = matrix(NA,nrow=length(tissues),ncol=2*length(biomarkers),
             dimnames = list(tissues,c(paste0(biomarkers,"-male"),paste0(biomarkers,"-female"))))
  for(tissue in tissues){
    for(biomarker in biomarkers){
      for(sex in c("male","female")){
        curr_ind = mt_biomarker_dea$tissue==tissue & mt_biomarker_dea$sex==sex &
             mt_biomarker_dea$biomarker==biomarker
        if(sum(curr_ind)!=1){next}
        m[tissue,paste(biomarker,sex,sep="-")] = mt_biomarker_dea[curr_ind,stat_name]
      }
    }
  }
  return(m)
}

mtdna_pctreads = sapply(mt_biomarker_correlations,function(x)x["mtdna","mt_reads"])
mtdna_pctreads = sort(mtdna_pctreads,decreasing = T)
names(mtdna_pctreads)[names(mtdna_pctreads)=="SKMGN"] = "SKM-GN"
names(mtdna_pctreads)[names(mtdna_pctreads)=="SKMVL"] = "SKM-VL"
names(mtdna_pctreads)[names(mtdna_pctreads)=="WATSC"] = "WAT-SC"
barplot(mtdna_pctreads,las=2,col=TISSUE_COLORS[names(mtdna_pctreads)],ylab = "Pearson correlation")
abline(h=0.5,lty=2,lwd=2)

tissues_with_cls = names(mt_biomarker_correlations)[sapply(mt_biomarker_correlations,nrow)>2]
corrgplots = list()
for(tissue_name in tissues_with_cls){
  m = mt_biomarker_correlations[[tissue_name]]
  rownames(m) = sapply(rownames(m),function(x)strsplit(x,split=">")[[1]][1])
  colnames(m) = rownames(m)
  m_bin = abs(m) < 0.5
  mode(m_bin) = "numeric"
  p = ggcorrplot(m,p.mat=m_bin,title = tissue_name,method = "circle")
  corrgplots[[tissue_name]] = p
}
do.call("grid.arrange", c(corrgplots[c("WATSC","LUNG")], ncol = 1))

sapply(corrgplots[c("LIVER","HEART","KIDNEY","SKMGN")],print)

##             LIVER   HEART   KIDNEY  SKMGN  
## data        List,7  List,7  List,7  List,7 
## layers      List,2  List,2  List,2  List,2 
## scales      ?       ?       ?       ?      
## mapping     List,3  List,3  List,3  List,3 
## theme       List,93 List,93 List,93 List,93
## coordinates ?       ?       ?       ?      
## facet       ?       ?       ?       ?      
## plot_env    ?       ?       ?       ?      
## labels      List,5  List,5  List,5  List,5 
## guides      List,1  List,1  List,1  List,1
tissues = sort(unique(mt_biomarker_dea$tissue))
cls = biomarkers[grepl("CL",biomarkers)]
non_cls = setdiff(biomarkers,cls)
tissues = setdiff(tissues,c("OVARY","TESTES"))

mt_biomarker_dea$biomarker[mt_biomarker_dea$biomarker == "CL(72:8)>CL(18:2/18:2/18:2/18:2)"] = "CL(72:8)"
sort(table(mt_biomarker_dea$biomarker))
## 
##                                                     CL(70:4) 
##                                                            2 
##                                                     CL(70:5) 
##                                                            2 
##                             CL(70:7)>CL(16:1_18:2_18:2_18:2) 
##                                                            2 
##                                                    CL(72:10) 
##                                                            2 
##                                                     CL(72:9) 
##                                                            2 
##                                                    CL(74:10) 
##                                                            2 
##                            CL(74:10)>CL(18:2_18:2_18:2_20:4) 
##                                                            2 
##                                                   CL(74:10)A 
##                                                            2 
##                                                     CL(74:7) 
##                                                            2 
##                                                     CL(74:8) 
##                                                            2 
## CL(74:8)>CL(18:1_18:2_18:2_20:3)_and_CL(18:2_18:2_18:2_20:2) 
##                                                            2 
## CL(74:8)>CL(18:2_18:2_18:2_20:2)_and_CL(18:1_18:2_18:2_20:3) 
##                                                            2 
## CL(74:9)>CL(18:1_18:2_18:2_20:4)_and_CL(18:2_18:2_18:2_20:3) 
##                                                            2 
##                                                    CL(76:11) 
##                                                            2 
##                            CL(76:11)>CL(18:1_18:2_18:2_22:6) 
##                                                            2 
##                                                    CL(76:13) 
##                                                            2 
##                                                     CL(70:6) 
##                                                            4 
##                                                     CL(72:6) 
##                                                            4 
##                            CL(74:11)>CL(18:2_18:2_18:2_20:5) 
##                                                            4 
##                                                     CL(70:7) 
##                                                            6 
##                             CL(72:6)>CL(18:1_18:1_18:2_18:2) 
##                                                            6 
##                                                     CL(72:7) 
##                                                            6 
##                             CL(72:7)>CL(18:1_18:2_18:2_18:2) 
##                                                            6 
##                            CL(74:10)>CL(18:1_18:2_18:2_20:5) 
##                                                            6 
##                                                    CL(76:12) 
##                                                            6 
##                                                    CL(74:11) 
##                                                            8 
##                                                     CL(74:9) 
##                                                            8 
##                                                     CL(72:8) 
##                                                           12 
##                                                        mtdna 
##                                                           28 
##                                                     mt_reads 
##                                                           34
biomarkers = c("mtdna","mt_reads","CL(74:9)","CL(74:11)","CL(72:8)")
mt_group_kw = get_stat_matrix(mt_biomarker_dea,"kw_p",biomarkers,tissues)
corrplot(t(-log10(mt_group_kw)),is.corr = F,method = "circle",
         p.mat = t(mt_group_kw),sig.level = 0.05)

par(mar=c(8,8,8,8))
tissue_counts_kw = colSums(mt_group_kw<0.05,na.rm=T)
barplot(sort(tissue_counts_kw,decreasing = T),las=2,col="white",
        ylab = "Num tissues with p<0.05")

fc8w = get_stat_matrix(mt_biomarker_dea,"fc_8w",biomarkers,tissues)
p8w = get_stat_matrix(mt_biomarker_dea,"pval_8w",biomarkers,tissues)
tstats = get_stat_matrix(mt_biomarker_dea,"tstat_8w",biomarkers,tissues)
corrplot(t(tstats),method = "circle",is.corr = F,p.mat = t(p8w),sig.level = 0.05)

par(mar=c(8,8,8,8))
tissue_counts_8w = colSums(p8w<0.05,na.rm=T)
barplot(sort(tissue_counts_8w,decreasing = T),las=2,col="white",ylab = "Num tissues with p<0.05")

fc1w = get_stat_matrix(mt_biomarker_dea,"fc_1w",biomarkers,tissues)
p1w = get_stat_matrix(mt_biomarker_dea,"pval_1w",biomarkers,tissues)
tstats1w = get_stat_matrix(mt_biomarker_dea,"tstat_1w",biomarkers,tissues)
corrplot(t(tstats1w),method = "circle",is.corr = F,p.mat = t(p1w),sig.level = 0.05)

par(mar=c(8,8,8,8))
tissue_counts_1w = colSums(p1w<0.05,na.rm=T)
barplot(sort(tissue_counts_1w,decreasing = T),las=2,col="white",
        ylab = "Num tissues with p<0.05")